from pathlib import Path
from pprint import pprint as pp
from datetime import datetime
from urllib.parse import urlparse

import re
import pandas as pd

##########################
## PIN SNAKEMAKE VERSION
##########################

from snakemake.utils import min_version

min_version("7.21.0")


################
## CONFIG FILE
################


configfile: "config/config.yaml"


#############################
## INCLUDE COMMON FUNCTIONS
#############################


include: "rules/common.smk"


###########################
## DEFINE PIPELINE CONTAINER
###########################


containerized: "docker://ftabaro/rna-seq:1.3"


#######################
## DEFINE VARIABLES
#######################


date = datetime.now()
timestamp = date.strftime("%Y-%m-%d")
log_folder = Path(config["globals"]["log_folder"])
log_folder = log_folder.joinpath(timestamp)

data_folder = Path(config["globals"]["results_folder"])
qc_folder = Path(config["globals"]["qc_folder"])
reads_folder = Path(config["globals"]["reads_folder"])
references_folder = Path(config["globals"]["references_folder"])
tmp_folder = Path(config["globals"]["tmp_folder"])
analysis_folder = Path(config["globals"]["analysis_folder"])

raw_reads_folder = reads_folder
trim_reads_folder = data_folder.joinpath("trim")
rdata_folder = analysis_folder.joinpath("rdata")
pictures_folder = analysis_folder.joinpath("pictures")
tables_folder = analysis_folder.joinpath("tables")
notebooks_folder = analysis_folder.joinpath("notebooks")

fastqc_raw_folder = qc_folder.joinpath("fastqc-raw")
fastqc_trim_folder = qc_folder.joinpath("fastqc-trimmed")
fastqc_markdup_folder = qc_folder.joinpath("fastqc-markdup")
fastqc_star_folder = qc_folder.joinpath("star")

multiqc_folder = qc_folder.joinpath("multiqc")
multiqc_raw_folder = multiqc_folder.joinpath("raw")
multiqc_trim_folder = multiqc_folder.joinpath("trim")
multiqc_star_folder = multiqc_folder.joinpath("star")
multiqc_markdup_folder = multiqc_folder.joinpath("markdup")

alignments_folder = data_folder.joinpath("alignments")
star_folder = alignments_folder.joinpath("star")
markdup_folder = alignments_folder.joinpath("star_markdup")
starTE_folder = alignments_folder.joinpath("starTE")
salmonTE_folder = data_folder.joinpath("salmonTE")
trna_coverage_folder = data_folder.joinpath("tRNA_coverage")
deseq2_working_directory = Path(config["deseq2"]["working_directory"])
# deseq2_notebook_input_path = Path(config["deseq2"]["notebook_path"])

## Generate paths for sequence data from links
fasta_path = references_folder.joinpath(
    get_filename(config["genome"]["fasta_url"], decompress=True)
)

gtf_path = references_folder.joinpath(
    get_filename(config["genome"]["gtf_url"], decompress=True)
)

rmsk_folder = references_folder.joinpath("rmsk")
tRNA_annotation_dir = references_folder.joinpath("gtrnadb")

## Get samples
libraries = config["sequencing_libraries"]

library_names_single = []
library_names_paired = []

for library in libraries:
    libname = library["name"]
    protocol = library["protocol"]
    if protocol == "pe":
        library_names_paired.append(libname)
    else:
        library_names_paired.append(libname)

# Download all chromosomes if no specific chromosome was specified
if not "selected_chromosomes" in config["genome"]:
    config["genome"]["selected_chromosomes"] = None

####################
## ONSTART BLOCK
####################

## These folders will be create at the beginning of the pipeline
init_folders = [
    log_folder,
    reads_folder,
    raw_reads_folder,
    trim_reads_folder,
    multiqc_folder,
    alignments_folder,
    references_folder,
    tmp_folder,
    star_folder,
    starTE_folder,
    salmonTE_folder,
    tables_folder,
    pictures_folder,
    notebooks_folder,
    fastqc_raw_folder,
]


onstart:
    import json
    from rich import print_json

    for folder in init_folders:
        mkdir(folder)

    print_json(json.dumps(config))


#############
## WORKFLOW
#############


wildcard_constraints:
    serie="|".join(library_names_single + library_names_paired),
    se_serie="|".join(library_names_single),
    pe_serie="|".join(library_names_paired),
    method="multihit|random",
    mate=r"_[MR][12](?:_sequence)?",


include: "rules/download-references.smk"
include: "rules/fastqc.smk"
include: "rules/trim_single.smk"
include: "rules/star.smk"
include: "rules/deseq2.smk"
include: "rules/salmonTE.smk"
include: "rules/starTE.smk"
include: "rules/filter_bam.smk"
include: "rules/picard_markdup.smk"
include: "rules/make_bw.smk"
include: "rules/coverage_tRNA.smk"


rule all:
    input:
        build_rule_all_inputs,
